{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Ejercicio misceláneo: Análisis exploratorio y Feature Engineering" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "from scipy import stats" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [], "source": [ "ruta_archivo = os.path.join(\"titanic\", \"train.csv\")\n", "df = pd.read_csv(os.path.join(\"titanic\", \"train.csv\"), index_col='PassengerId')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
PassengerId
103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " Survived Pclass \\\n", "PassengerId \n", "1 0 3 \n", "2 1 1 \n", "3 1 3 \n", "4 1 1 \n", "5 0 3 \n", "\n", " Name Sex Age \\\n", "PassengerId \n", "1 Braund, Mr. Owen Harris male 22.0 \n", "2 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n", "3 Heikkinen, Miss. Laina female 26.0 \n", "4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n", "5 Allen, Mr. William Henry male 35.0 \n", "\n", " SibSp Parch Ticket Fare Cabin Embarked \n", "PassengerId \n", "1 1 0 A/5 21171 7.2500 NaN S \n", "2 1 0 PC 17599 71.2833 C85 C \n", "3 0 0 STON/O2. 3101282 7.9250 NaN S \n", "4 1 0 113803 53.1000 C123 S \n", "5 0 0 373450 8.0500 NaN S " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Elimine las variables/columnas 'Ticket' y 'Cabin'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarked
PassengerId
103Braund, Mr. Owen Harrismale22.0107.2500S
211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.01071.2833C
313Heikkinen, Miss. Lainafemale26.0007.9250S
411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01053.1000S
503Allen, Mr. William Henrymale35.0008.0500S
\n", "
" ], "text/plain": [ " Survived Pclass \\\n", "PassengerId \n", "1 0 3 \n", "2 1 1 \n", "3 1 3 \n", "4 1 1 \n", "5 0 3 \n", "\n", " Name Sex Age \\\n", "PassengerId \n", "1 Braund, Mr. Owen Harris male 22.0 \n", "2 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n", "3 Heikkinen, Miss. Laina female 26.0 \n", "4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n", "5 Allen, Mr. William Henry male 35.0 \n", "\n", " SibSp Parch Fare Embarked \n", "PassengerId \n", "1 1 0 7.2500 S \n", "2 1 0 71.2833 C \n", "3 0 0 7.9250 S \n", "4 1 0 53.1000 S \n", "5 0 0 8.0500 S " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.drop(columns=['Cabin'], inplace=True)\n", "df.drop(columns=['Ticket'], inplace=True)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarked
count892.000000892.000000892892715.000000892.000000892.000000891.000000890
uniqueNaNNaN8922NaNNaNNaNNaN3
topNaNNaNJonsson, Mr. CarlmaleNaNNaNNaNNaNS
freqNaNNaN1578NaNNaNNaNNaN645
mean0.3834082.309417NaNNaN29.7421960.5224220.38116632.204208NaN
std0.4864890.835923NaNNaN14.5619511.1022640.80570649.693429NaN
min0.0000001.000000NaNNaN0.4200000.0000000.0000000.000000NaN
25%0.0000002.000000NaNNaN20.2500000.0000000.0000007.910400NaN
50%0.0000003.000000NaNNaN28.0000000.0000000.00000014.454200NaN
75%1.0000003.000000NaNNaN38.0000001.0000000.00000031.000000NaN
max1.0000003.000000NaNNaN80.0000008.0000006.000000512.329200NaN
\n", "
" ], "text/plain": [ " Survived Pclass Name Sex Age \\\n", "count 892.000000 892.000000 892 892 715.000000 \n", "unique NaN NaN 892 2 NaN \n", "top NaN NaN Jonsson, Mr. Carl male NaN \n", "freq NaN NaN 1 578 NaN \n", "mean 0.383408 2.309417 NaN NaN 29.742196 \n", "std 0.486489 0.835923 NaN NaN 14.561951 \n", "min 0.000000 1.000000 NaN NaN 0.420000 \n", "25% 0.000000 2.000000 NaN NaN 20.250000 \n", "50% 0.000000 3.000000 NaN NaN 28.000000 \n", "75% 1.000000 3.000000 NaN NaN 38.000000 \n", "max 1.000000 3.000000 NaN NaN 80.000000 \n", "\n", " SibSp Parch Fare Embarked \n", "count 892.000000 892.000000 891.000000 890 \n", "unique NaN NaN NaN 3 \n", "top NaN NaN NaN S \n", "freq NaN NaN NaN 645 \n", "mean 0.522422 0.381166 32.204208 NaN \n", "std 1.102264 0.805706 49.693429 NaN \n", "min 0.000000 0.000000 0.000000 NaN \n", "25% 0.000000 0.000000 7.910400 NaN \n", "50% 0.000000 0.000000 14.454200 NaN \n", "75% 1.000000 0.000000 31.000000 NaN \n", "max 8.000000 6.000000 512.329200 NaN " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Encuentre los mejores valores para completar la variable 'Embarked' para los pasajeros con datos faltantes" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarkedTitleIsMother
PassengerId
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Survived, Pclass, Name, Sex, Age, SibSp, Parch, Fare, Embarked, Title, IsMother]\n", "Index: []" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.Embarked.isnull()]" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "S 645\n", "C 168\n", "Q 77\n", "Name: Embarked, dtype: int64" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Tome en cuenta que simplemente emplear el valor que más aparece casi nunca es la mejor alternativa\n", "#Para obtener mejores resultados deberá apoyarse en los resultados de un análisis exploratorio de datos\n", "#aplicado a las variable(s) que puedan estar relacionadas con 'Embarked'\n", "df['Embarked'].value_counts()" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pclass Sex \n", "1 female S\n", " male S\n", "2 female S\n", " male S\n", "3 female S\n", " male S\n", "Name: Embarked, dtype: object" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Obtenemos el puerto más común según la clase y el sexo\n", "df.groupby(['Pclass', 'Sex'])['Embarked'].agg(pd.Series.mode)" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarkedTitleIsMother
PassengerId
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Survived, Pclass, Name, Sex, Age, SibSp, Parch, Fare, Embarked, Title, IsMother]\n", "Index: []" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Como podemos observar, todos los valores que se obtuvieron en la celda de arriba pertenecen al puerto 'S'\n", "# Entonces, reemplazaremos los valores NaN por el puerto 'S'\n", "df['Embarked'].fillna('S', inplace=True)\n", "df[df.Embarked.isnull()]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Complete el código de la siguiente función para extraer el título de cada pasajero" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "def extractTitle(name):\n", " title_mapping = {'mr' : 'Mr', \n", " 'mrs' : 'Mrs', \n", " 'miss' : 'Miss', \n", " 'master' : 'Master',\n", " 'don' : 'Sir',\n", " 'rev' : 'Sir',\n", " 'dr' : 'Officer',\n", " 'mme' : 'Mrs',\n", " 'ms' : 'Mrs',\n", " 'major' : 'Officer',\n", " 'lady' : 'Lady',\n", " 'sir' : 'Sir',\n", " 'mlle' : 'Miss',\n", " 'col' : 'Officer',\n", " 'capt' : 'Officer',\n", " 'the countess' : 'Lady',\n", " 'jonkheer' : 'Sir',\n", " 'dona' : 'Lady'\n", " }\n", " title = name.split(', ')[1].split('.')[0].lower()\n", " return title_mapping[title]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarkedTitle
PassengerId
103Braund, Mr. Owen Harrismale22.0107.2500SMr
211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.01071.2833CMrs
313Heikkinen, Miss. Lainafemale26.0007.9250SMiss
411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01053.1000SMrs
503Allen, Mr. William Henrymale35.0008.0500SMr
\n", "
" ], "text/plain": [ " Survived Pclass \\\n", "PassengerId \n", "1 0 3 \n", "2 1 1 \n", "3 1 3 \n", "4 1 1 \n", "5 0 3 \n", "\n", " Name Sex Age \\\n", "PassengerId \n", "1 Braund, Mr. Owen Harris male 22.0 \n", "2 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n", "3 Heikkinen, Miss. Laina female 26.0 \n", "4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n", "5 Allen, Mr. William Henry male 35.0 \n", "\n", " SibSp Parch Fare Embarked Title \n", "PassengerId \n", "1 1 0 7.2500 S Mr \n", "2 1 0 71.2833 C Mrs \n", "3 0 0 7.9250 S Miss \n", "4 1 0 53.1000 S Mrs \n", "5 0 0 8.0500 S Mr " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Title'] = df['Name'].map(lambda name : extractTitle(name))\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarkedTitle
count892.000000892.000000892892715.000000892.000000892.000000891.000000890892
uniqueNaNNaN8922NaNNaNNaNNaN37
topNaNNaNSimmons, Mr. JohnmaleNaNNaNNaNNaNSMr
freqNaNNaN1578NaNNaNNaNNaN645518
mean0.3834082.309417NaNNaN29.7421960.5224220.38116632.204208NaNNaN
std0.4864890.835923NaNNaN14.5619511.1022640.80570649.693429NaNNaN
min0.0000001.000000NaNNaN0.4200000.0000000.0000000.000000NaNNaN
25%0.0000002.000000NaNNaN20.2500000.0000000.0000007.910400NaNNaN
50%0.0000003.000000NaNNaN28.0000000.0000000.00000014.454200NaNNaN
75%1.0000003.000000NaNNaN38.0000001.0000000.00000031.000000NaNNaN
max1.0000003.000000NaNNaN80.0000008.0000006.000000512.329200NaNNaN
\n", "
" ], "text/plain": [ " Survived Pclass Name Sex Age \\\n", "count 892.000000 892.000000 892 892 715.000000 \n", "unique NaN NaN 892 2 NaN \n", "top NaN NaN Simmons, Mr. John male NaN \n", "freq NaN NaN 1 578 NaN \n", "mean 0.383408 2.309417 NaN NaN 29.742196 \n", "std 0.486489 0.835923 NaN NaN 14.561951 \n", "min 0.000000 1.000000 NaN NaN 0.420000 \n", "25% 0.000000 2.000000 NaN NaN 20.250000 \n", "50% 0.000000 3.000000 NaN NaN 28.000000 \n", "75% 1.000000 3.000000 NaN NaN 38.000000 \n", "max 1.000000 3.000000 NaN NaN 80.000000 \n", "\n", " SibSp Parch Fare Embarked Title \n", "count 892.000000 892.000000 891.000000 890 892 \n", "unique NaN NaN NaN 3 7 \n", "top NaN NaN NaN S Mr \n", "freq NaN NaN NaN 645 518 \n", "mean 0.522422 0.381166 32.204208 NaN NaN \n", "std 1.102264 0.805706 49.693429 NaN NaN \n", "min 0.000000 0.000000 0.000000 NaN NaN \n", "25% 0.000000 0.000000 7.910400 NaN NaN \n", "50% 0.000000 0.000000 14.454200 NaN NaN \n", "75% 1.000000 0.000000 31.000000 NaN NaN \n", "max 8.000000 6.000000 512.329200 NaN NaN " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Encuentre los mejores valores para completar la variable 'Age' para los pasajeros con datos faltantes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Tome en cuenta que simplemente emplear el valor de tendencia central (mediana) de todo el conjunto casi nunca es la mejor alternativa\n", "Para obtener mejores resultados deberá apoyarse en los resultados de un análisis exploratorio de datos\n", "aplicado a las variable(s) que puedan estar relacionadas con 'Age'" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarkedTitle
PassengerId
603Moran, Mr. JamesmaleNaN008.4583QMr
1812Williams, Mr. Charles EugenemaleNaN0013.0000SMr
2013Masselmani, Mrs. FatimafemaleNaN007.2250CMrs
2703Emir, Mr. Farred ChehabmaleNaN007.2250CMr
2913O'Dwyer, Miss. Ellen \"Nellie\"femaleNaN007.8792QMiss
\n", "
" ], "text/plain": [ " Survived Pclass Name Sex Age \\\n", "PassengerId \n", "6 0 3 Moran, Mr. James male NaN \n", "18 1 2 Williams, Mr. Charles Eugene male NaN \n", "20 1 3 Masselmani, Mrs. Fatima female NaN \n", "27 0 3 Emir, Mr. Farred Chehab male NaN \n", "29 1 3 O'Dwyer, Miss. Ellen \"Nellie\" female NaN \n", "\n", " SibSp Parch Fare Embarked Title \n", "PassengerId \n", "6 0 0 8.4583 Q Mr \n", "18 0 0 13.0000 S Mr \n", "20 0 0 7.2250 C Mrs \n", "27 0 0 7.2250 C Mr \n", "29 0 0 7.8792 Q Miss " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['Age'].isnull()].head()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(177, 10)" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['Age'].isnull()].shape" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "28.0" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Age'].median()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassAgeSibSpParchFare
Survived1.000000-0.338965-0.079388-0.0348860.0820100.257307
Pclass-0.3389651.000000-0.3651610.0825990.017994-0.549500
Age-0.079388-0.3651611.000000-0.308847-0.1899890.096067
SibSp-0.0348860.082599-0.3088471.0000000.4149850.159651
Parch0.0820100.017994-0.1899890.4149851.0000000.216225
Fare0.257307-0.5495000.0960670.1596510.2162251.000000
\n", "
" ], "text/plain": [ " Survived Pclass Age SibSp Parch Fare\n", "Survived 1.000000 -0.338965 -0.079388 -0.034886 0.082010 0.257307\n", "Pclass -0.338965 1.000000 -0.365161 0.082599 0.017994 -0.549500\n", "Age -0.079388 -0.365161 1.000000 -0.308847 -0.189989 0.096067\n", "SibSp -0.034886 0.082599 -0.308847 1.000000 0.414985 0.159651\n", "Parch 0.082010 0.017994 -0.189989 0.414985 1.000000 0.216225\n", "Fare 0.257307 -0.549500 0.096067 0.159651 0.216225 1.000000" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.corr()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Según la matriz de correlación, *Age* tiene la correlación más alta con *Pclass*. Entonces, agruparemos los datos según este parámetro. También los agruparemos según el sexo, o sea, que los datos estarán agrupados por clase y por sexo. Porque las edades de las personas pueden variar según su sexo." ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "pasajeros_por_clase_y_sexo = df.groupby(['Pclass', 'Sex'])\n", "mediana_age_pasajeros = pasajeros_por_clase_y_sexo['Age'].transform('median')" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarkedTitle
PassengerId
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Survived, Pclass, Name, Sex, Age, SibSp, Parch, Fare, Embarked, Title]\n", "Index: []" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Reemplazar los datos nulos de Age\n", "df['Age'].fillna(mediana_age_pasajeros, inplace=True)\n", "\n", "df[df['Age'].isnull()].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Cree una nueva variable 'IsMother' (1=es madre, 0=no es madre) \n", "\n", "#### Responda la siguiente pregunta sustentando su respuesta con los resultados de un análisis exploratorio de datos.\n", "#### Las madres tuvieron mayor probabilidad de sobrevivir al accidente del Titanic?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Elegimos a las madres como las mujeres que están casadas y que viajen al menos con un Parch(parientes que sean padres o hijos).\n", "\n", "No incluímos a las mujeres con el título de Miss, para evitar incluir a las mujeres que no son madres pero que \n", "viajan con sus papás.\n", "\n", "Hay que tener en cuenta que en esa época no habían muchas madres solteras (por el machismo), así que no se\n", "pierde mucha información en caso de que una mujer sea madre y no esté casada." ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarkedTitleIsMother
PassengerId
88811Graham, Miss. Margaret Edithfemale19.00030.00SMiss0
88903Johnston, Miss. Catherine Helen \"Carrie\"female21.51223.45SMiss0
89011Behr, Mr. Karl Howellmale26.00030.00CMr0
89103Dooley, Mr. Patrickmale32.0007.75QMr0
104403Storey, Mr. Thomasmale60.500NaNSMr0
\n", "
" ], "text/plain": [ " Survived Pclass Name \\\n", "PassengerId \n", "888 1 1 Graham, Miss. Margaret Edith \n", "889 0 3 Johnston, Miss. Catherine Helen \"Carrie\" \n", "890 1 1 Behr, Mr. Karl Howell \n", "891 0 3 Dooley, Mr. Patrick \n", "1044 0 3 Storey, Mr. Thomas \n", "\n", " Sex Age SibSp Parch Fare Embarked Title IsMother \n", "PassengerId \n", "888 female 19.0 0 0 30.00 S Miss 0 \n", "889 female 21.5 1 2 23.45 S Miss 0 \n", "890 male 26.0 0 0 30.00 C Mr 0 \n", "891 male 32.0 0 0 7.75 Q Mr 0 \n", "1044 male 60.5 0 0 NaN S Mr 0 " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def es_mujer_y_no_soltera_y_viaja_con_hijos(fila):\n", " return (fila['Parch'] > 0) & (fila['Sex'] == 'female') & (fila['Title'] != 'Miss')\n", "\n", "def es_madre(fila):\n", " if es_mujer_y_no_soltera_y_viaja_con_hijos(fila):\n", " return 1\n", " return 0\n", "\n", "df['IsMother'] = df.apply(es_madre, axis=1)\n", "df.tail()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchFareEmbarkedTitleIsMother
PassengerId
913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00211.1333SMrs1
2613Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...female38.01531.3875SMrs1
9912Doling, Mrs. John T (Ada Julia Bone)female34.00123.0000SMrs1
14103Boulos, Mrs. Joseph (Sultana)female21.50215.2458CMrs1
16711Chibnall, Mrs. (Edith Martha Bowerman)female35.00155.0000SMrs1
16803Skoog, Mrs. William (Anna Bernhardina Karlsson)female45.01427.9000SMrs1
24812Hamalainen, Mrs. William (Anna)female24.00214.5000SMrs1
25203Strom, Mrs. Wilhelm (Elna Matilda Persson)female29.01110.4625SMrs1
25503Rosblom, Mrs. Viktor (Helena Wilhelmina)female41.00220.2125SMrs1
25613Touma, Mrs. Darwis (Hanne Youssef Razi)female29.00215.2458CMrs1
26012Parrish, Mrs. (Lutie Davis)female50.00126.0000SMrs1
26911Graham, Mrs. William Thompson (Edith Junkins)female58.001153.4625SMrs1
27312Mellinger, Mrs. (Elizabeth Anne Maidment)female41.00119.5000SMrs1
28013Abbott, Mrs. Stanton (Rosa Hunt)female35.01120.2500SMrs1
30011Baxter, Mrs. James (Helene DeLaudeniere Chaput)female50.001247.5208CMrs1
31302Lahtinen, Mrs. William (Anna Sylfven)female26.01126.0000SMrs1
32011Spedden, Mrs. Frederic Oakley (Margaretta Corn...female40.011134.5000CMrs1
32412Caldwell, Mrs. Albert Francis (Sylvia Mae Harb...female22.01129.0000SMrs1
32913Goldsmith, Mrs. Frank John (Emily Alice Brown)female31.01120.5250SMrs1
36303Barbara, Mrs. (Catherine David)female45.00114.4542CMrs1
39513Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengt...female24.00216.7000SMrs1
41712Drew, Mrs. James Vivian (Lulu Thorne Christian)female34.01132.5000SMrs1
42403Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria ...female28.01114.4000SMrs1
43812Richards, Mrs. Sidney (Emily Hocking)female24.02318.7500SMrs1
44112Hart, Mrs. Benjamin (Esther Ada Bloomfield)female45.01126.2500SMrs1
47312West, Mrs. Edwy Arthur (Ada Mary Worth)female33.01227.7500SMrs1
49901Allison, Mrs. Hudson J C (Bessie Waldo Daniels)female25.012151.5500SMrs1
50712Quick, Mrs. Frederick Charles (Jane Richards)female33.00226.0000SMrs1
52411Hippach, Mrs. Louis Albert (Ida Sophia Fischer)female44.00157.9792CMrs1
53413Peter, Mrs. Catherine (Catherine Rizk)female21.50222.3583CMrs1
55911Taussig, Mrs. Emil (Tillie Mandelbaum)female39.01179.6500SMrs1
56803Palsson, Mrs. Nils (Alma Cornelia Berglund)female29.00421.0750SMrs1
58211Thayer, Mrs. John Borland (Marian Longstreth M...female39.011110.8833CMrs1
60112Jacobsohn, Mrs. Sidney Samuel (Amy Frances Chr...female24.02127.0000SMrs1
60912Laroche, Mrs. Joseph (Juliette Marie Louise La...female22.01241.5792CMrs1
61103Andersson, Mrs. Anders Johan (Alfrida Konstant...female39.01531.2750SMrs1
63903Panula, Mrs. Juha (Maria Emilia Ojala)female41.00539.6875SMrs1
65803Bourke, Mrs. John (Catherine)female32.01115.5000QMrs1
67112Brown, Mrs. Thomas William Solomon (Elizabeth ...female40.01139.0000SMrs1
67903Goodwin, Mrs. Frederick (Augusta Tyler)female43.01646.9000SMrs1
73703Ford, Mrs. Edward (Margaret Ann Watson)female48.01334.3750SMrs1
75512Herman, Mrs. Samuel (Jane Laver)female48.01265.0000SMrs1
76411Carter, Mrs. William Ernest (Lucile Polk)female36.012120.0000SMrs1
77512Hocking, Mrs. Elizabeth (Eliza Needs)female54.01323.0000SMrs1
78011Robert, Mrs. Edward Scott (Elisabeth Walton Mc...female43.001211.3375SMrs1
80003Van Impe, Mrs. Jean Baptiste (Rosalie Paula Go...female30.01124.1500SMrs1
80212Collyer, Mrs. Harvey (Charlotte Annie Tate)female31.01126.2500SMrs1
82111Hays, Mrs. Charles Melville (Clara Jennings Gr...female52.01193.5000SMrs1
82413Moor, Mrs. (Beila)female27.00112.4750SMrs1
85613Aks, Mrs. Sam (Leah Rosen)female18.0019.3500SMrs1
85711Wick, Mrs. George Dennick (Mary Hitchcock)female45.011164.8667SMrs1
85913Baclini, Mrs. Solomon (Latifa Qurban)female24.00319.2583CMrs1
87211Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.01152.5542SMrs1
88011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.00183.1583CMrs1
88112Shelley, Mrs. William (Imanita Parrish Hall)female25.00126.0000SMrs1
88603Rice, Mrs. William (Margaret Norton)female39.00529.1250QMrs1
\n", "
" ], "text/plain": [ " Survived Pclass \\\n", "PassengerId \n", "9 1 3 \n", "26 1 3 \n", "99 1 2 \n", "141 0 3 \n", "167 1 1 \n", "168 0 3 \n", "248 1 2 \n", "252 0 3 \n", "255 0 3 \n", "256 1 3 \n", "260 1 2 \n", "269 1 1 \n", "273 1 2 \n", "280 1 3 \n", "300 1 1 \n", "313 0 2 \n", "320 1 1 \n", "324 1 2 \n", "329 1 3 \n", "363 0 3 \n", "395 1 3 \n", "417 1 2 \n", "424 0 3 \n", "438 1 2 \n", "441 1 2 \n", "473 1 2 \n", "499 0 1 \n", "507 1 2 \n", "524 1 1 \n", "534 1 3 \n", "559 1 1 \n", "568 0 3 \n", "582 1 1 \n", "601 1 2 \n", "609 1 2 \n", "611 0 3 \n", "639 0 3 \n", "658 0 3 \n", "671 1 2 \n", "679 0 3 \n", "737 0 3 \n", "755 1 2 \n", "764 1 1 \n", "775 1 2 \n", "780 1 1 \n", "800 0 3 \n", "802 1 2 \n", "821 1 1 \n", "824 1 3 \n", "856 1 3 \n", "857 1 1 \n", "859 1 3 \n", "872 1 1 \n", "880 1 1 \n", "881 1 2 \n", "886 0 3 \n", "\n", " Name Sex Age \\\n", "PassengerId \n", "9 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 \n", "26 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 \n", "99 Doling, Mrs. John T (Ada Julia Bone) female 34.0 \n", "141 Boulos, Mrs. Joseph (Sultana) female 21.5 \n", "167 Chibnall, Mrs. (Edith Martha Bowerman) female 35.0 \n", "168 Skoog, Mrs. William (Anna Bernhardina Karlsson) female 45.0 \n", "248 Hamalainen, Mrs. William (Anna) female 24.0 \n", "252 Strom, Mrs. Wilhelm (Elna Matilda Persson) female 29.0 \n", "255 Rosblom, Mrs. Viktor (Helena Wilhelmina) female 41.0 \n", "256 Touma, Mrs. Darwis (Hanne Youssef Razi) female 29.0 \n", "260 Parrish, Mrs. (Lutie Davis) female 50.0 \n", "269 Graham, Mrs. William Thompson (Edith Junkins) female 58.0 \n", "273 Mellinger, Mrs. (Elizabeth Anne Maidment) female 41.0 \n", "280 Abbott, Mrs. Stanton (Rosa Hunt) female 35.0 \n", "300 Baxter, Mrs. James (Helene DeLaudeniere Chaput) female 50.0 \n", "313 Lahtinen, Mrs. William (Anna Sylfven) female 26.0 \n", "320 Spedden, Mrs. Frederic Oakley (Margaretta Corn... female 40.0 \n", "324 Caldwell, Mrs. Albert Francis (Sylvia Mae Harb... female 22.0 \n", "329 Goldsmith, Mrs. Frank John (Emily Alice Brown) female 31.0 \n", "363 Barbara, Mrs. (Catherine David) female 45.0 \n", "395 Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengt... female 24.0 \n", "417 Drew, Mrs. James Vivian (Lulu Thorne Christian) female 34.0 \n", "424 Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria ... female 28.0 \n", "438 Richards, Mrs. Sidney (Emily Hocking) female 24.0 \n", "441 Hart, Mrs. Benjamin (Esther Ada Bloomfield) female 45.0 \n", "473 West, Mrs. Edwy Arthur (Ada Mary Worth) female 33.0 \n", "499 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0 \n", "507 Quick, Mrs. Frederick Charles (Jane Richards) female 33.0 \n", "524 Hippach, Mrs. Louis Albert (Ida Sophia Fischer) female 44.0 \n", "534 Peter, Mrs. Catherine (Catherine Rizk) female 21.5 \n", "559 Taussig, Mrs. Emil (Tillie Mandelbaum) female 39.0 \n", "568 Palsson, Mrs. Nils (Alma Cornelia Berglund) female 29.0 \n", "582 Thayer, Mrs. John Borland (Marian Longstreth M... female 39.0 \n", "601 Jacobsohn, Mrs. Sidney Samuel (Amy Frances Chr... female 24.0 \n", "609 Laroche, Mrs. Joseph (Juliette Marie Louise La... female 22.0 \n", "611 Andersson, Mrs. Anders Johan (Alfrida Konstant... female 39.0 \n", "639 Panula, Mrs. Juha (Maria Emilia Ojala) female 41.0 \n", "658 Bourke, Mrs. John (Catherine) female 32.0 \n", "671 Brown, Mrs. Thomas William Solomon (Elizabeth ... female 40.0 \n", "679 Goodwin, Mrs. Frederick (Augusta Tyler) female 43.0 \n", "737 Ford, Mrs. Edward (Margaret Ann Watson) female 48.0 \n", "755 Herman, Mrs. Samuel (Jane Laver) female 48.0 \n", "764 Carter, Mrs. William Ernest (Lucile Polk) female 36.0 \n", "775 Hocking, Mrs. Elizabeth (Eliza Needs) female 54.0 \n", "780 Robert, Mrs. Edward Scott (Elisabeth Walton Mc... female 43.0 \n", "800 Van Impe, Mrs. Jean Baptiste (Rosalie Paula Go... female 30.0 \n", "802 Collyer, Mrs. Harvey (Charlotte Annie Tate) female 31.0 \n", "821 Hays, Mrs. Charles Melville (Clara Jennings Gr... female 52.0 \n", "824 Moor, Mrs. (Beila) female 27.0 \n", "856 Aks, Mrs. Sam (Leah Rosen) female 18.0 \n", "857 Wick, Mrs. George Dennick (Mary Hitchcock) female 45.0 \n", "859 Baclini, Mrs. Solomon (Latifa Qurban) female 24.0 \n", "872 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 \n", "880 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 \n", "881 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 \n", "886 Rice, Mrs. William (Margaret Norton) female 39.0 \n", "\n", " SibSp Parch Fare Embarked Title IsMother \n", "PassengerId \n", "9 0 2 11.1333 S Mrs 1 \n", "26 1 5 31.3875 S Mrs 1 \n", "99 0 1 23.0000 S Mrs 1 \n", "141 0 2 15.2458 C Mrs 1 \n", "167 0 1 55.0000 S Mrs 1 \n", "168 1 4 27.9000 S Mrs 1 \n", "248 0 2 14.5000 S Mrs 1 \n", "252 1 1 10.4625 S Mrs 1 \n", "255 0 2 20.2125 S Mrs 1 \n", "256 0 2 15.2458 C Mrs 1 \n", "260 0 1 26.0000 S Mrs 1 \n", "269 0 1 153.4625 S Mrs 1 \n", "273 0 1 19.5000 S Mrs 1 \n", "280 1 1 20.2500 S Mrs 1 \n", "300 0 1 247.5208 C Mrs 1 \n", "313 1 1 26.0000 S Mrs 1 \n", "320 1 1 134.5000 C Mrs 1 \n", "324 1 1 29.0000 S Mrs 1 \n", "329 1 1 20.5250 S Mrs 1 \n", "363 0 1 14.4542 C Mrs 1 \n", "395 0 2 16.7000 S Mrs 1 \n", "417 1 1 32.5000 S Mrs 1 \n", "424 1 1 14.4000 S Mrs 1 \n", "438 2 3 18.7500 S Mrs 1 \n", "441 1 1 26.2500 S Mrs 1 \n", "473 1 2 27.7500 S Mrs 1 \n", "499 1 2 151.5500 S Mrs 1 \n", "507 0 2 26.0000 S Mrs 1 \n", "524 0 1 57.9792 C Mrs 1 \n", "534 0 2 22.3583 C Mrs 1 \n", "559 1 1 79.6500 S Mrs 1 \n", "568 0 4 21.0750 S Mrs 1 \n", "582 1 1 110.8833 C Mrs 1 \n", "601 2 1 27.0000 S Mrs 1 \n", "609 1 2 41.5792 C Mrs 1 \n", "611 1 5 31.2750 S Mrs 1 \n", "639 0 5 39.6875 S Mrs 1 \n", "658 1 1 15.5000 Q Mrs 1 \n", "671 1 1 39.0000 S Mrs 1 \n", "679 1 6 46.9000 S Mrs 1 \n", "737 1 3 34.3750 S Mrs 1 \n", "755 1 2 65.0000 S Mrs 1 \n", "764 1 2 120.0000 S Mrs 1 \n", "775 1 3 23.0000 S Mrs 1 \n", "780 0 1 211.3375 S Mrs 1 \n", "800 1 1 24.1500 S Mrs 1 \n", "802 1 1 26.2500 S Mrs 1 \n", "821 1 1 93.5000 S Mrs 1 \n", "824 0 1 12.4750 S Mrs 1 \n", "856 0 1 9.3500 S Mrs 1 \n", "857 1 1 164.8667 S Mrs 1 \n", "859 0 3 19.2583 C Mrs 1 \n", "872 1 1 52.5542 S Mrs 1 \n", "880 0 1 83.1583 C Mrs 1 \n", "881 0 1 26.0000 S Mrs 1 \n", "886 0 5 29.1250 Q Mrs 1 " ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[(df['IsMother'] == 1)]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }